"""
Abstract base class for file integrity verifiers.

Defines the interface for file-type-specific integrity verification.
Concrete implementations specify which files to verify and policies.
"""

from abc import ABC, abstractmethod
from enum import Enum
from pathlib import Path
import hashlib


class FileType(str, Enum):
    """Enum for file types - ensures consistency across the codebase"""

    FAISS_INDEX = "faiss_index"
    PDF = "pdf"
    EXPORT = "export"


class BaseFileVerifier(ABC):
    """
    Base class for file integrity verification.

    Subclasses implement file-type-specific logic for:
    - Identifying which files they handle
    - Defining verification policies
    - Optionally customizing checksum algorithms
    """

    @abstractmethod
    def should_verify(self, file_path: Path) -> bool:
        """
        Determine if this verifier handles the given file.

        Args:
            file_path: Path to file to check

        Returns:
            True if this verifier should handle this file type
        """
        pass

    @abstractmethod
    def get_file_type(self) -> FileType:
        """
        Get the file type identifier for this verifier.

        Returns:
            FileType enum value
        """
        pass

    @abstractmethod
    def allows_modifications(self) -> bool:
        """
        Whether this file type can be legitimately modified by users.

        Returns:
            True if users can modify files (e.g., PDFs with annotations)
            False if files should never be manually modified (e.g., FAISS indexes)
        """
        pass

    def calculate_checksum(self, file_path: Path) -> str:
        """
        Calculate SHA256 checksum of file.

        Can be overridden by subclasses for different algorithms.

        Args:
            file_path: Path to file to checksum

        Returns:
            Hex string of checksum

        Raises:
            FileNotFoundError: If file doesn't exist
            IOError: If file can't be read
        """
        sha256_hash = hashlib.sha256()
        with open(file_path, "rb") as f:
            # Read in chunks to handle large files efficiently
            for byte_block in iter(lambda: f.read(4096), b""):
                sha256_hash.update(byte_block)
        return sha256_hash.hexdigest()

    def get_algorithm(self) -> str:
        """
        Get the checksum algorithm name.

        Returns:
            Algorithm identifier (default: 'sha256')
        """
        return "sha256"
